In [4]:
## 사전실행코드
import polars as pl
import plotly.io as pio
pio.renderers.default = "notebook_connected"
import plotly.express as px
df_spotify = (pl.read_csv("./universal_top_spotify_songs.csv", try_parse_dates = True,
null_values = [""])
.filter(pl.col('snapshot_date').dt.year() == 2024).sort('snapshot_date'))
df_spotify = (
df_spotify.with_columns(pl.when(pl.col('country').is_null() == True) ## country 열이 null이면
.then(pl.lit('WW')) ## WW로 변경
.otherwise(pl.col('country')).alias('country')) ## 아니면 원래대로
.drop_nulls()) ## 그 외 null이 들어간 행 삭제
key_levels = pl.Enum(["C", "C#", "D", "Eb", "E", "F", "F#", "G", "G#", "A", "Bb", "B"])
df_spotify = (df_spotify.with_columns(pl.col('key').cast(pl.String)
.replace(["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"],
["C", "C#", "D", "Eb", "E", "F", "F#", "G", "G#", "A", "Bb", "B"]))
.with_columns(pl.col('key').cast(key_levels)).sort('key'))
df_spotify = (df_spotify.with_columns(pl.col('artists').str.split(', ')) ## ,를 기준으로 문자열을 분리
## 리스트의 첫 번째 아이템을 가져와서 main_vocal로 저장
.with_columns(pl.col('artists').list.get(0, null_on_oob = True).alias('main_vocal'),
pl.col('artists').list.tail(-1).alias('featuring')) ## 첫 번째 아이템을 제외한 나머지를featuring으로 저장
.with_columns(pl.when(pl.col('featuring').list.len() == 0) ## 리스트 길이가 0이면
.then(None) ## None으로 설정
.otherwise(pl.col('featuring')).name.keep())) ## 아니면 그대로 유지
import pycountry_convert as pc
def get_continent_name(nation_code: str) -> str:
if nation_code != 'WW':
continent_code = pc.country_alpha2_to_continent_code(nation_code)
else:
continent_code = 'WW'
continent_dict = {"NA": "North America","SA": "South America", "AS": "Asia", "AF": "Africa",
"OC": "Oceania", "EU": "Europe", "AQ": "Antarctica", "WW": "Global"}
return continent_dict[continent_code]
df_spotify = (df_spotify.with_columns(pl.col('country')
## 앞서 정의한 함수를 country 열에 적용
.map_elements(get_continent_name, return_dtype = pl.String).alias('continent')))
10.2 수치형 변수 간 상관관계 회귀분석하기¶
In [5]:
df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).corr()
Out[5]:
shape: (16, 16)
| daily_rank | daily_movement | weekly_movement | popularity | duration_ms | mode | time_signature | danceability | energy | loudness | speechiness | acousticness | instrumentalness | liveness | valence | tempo |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
| 1.0 | -0.145062 | -0.264775 | -0.112801 | 0.036033 | 0.011965 | 0.027519 | -0.062886 | -0.042519 | -0.044374 | 0.004615 | 0.040069 | 0.013488 | 0.030228 | -0.057007 | 0.018175 |
| -0.145062 | 1.0 | 0.368973 | -0.156114 | 0.009213 | 0.009721 | 0.00442 | -0.017507 | -0.00345 | -0.018784 | 0.000933 | 0.000971 | 0.013345 | 0.004058 | -0.003661 | -0.002467 |
| -0.264775 | 0.368973 | 1.0 | -0.176558 | 0.009557 | 0.010423 | 0.008576 | -0.029831 | -0.00634 | -0.025519 | 0.019179 | -0.000584 | 0.008384 | 0.012274 | -0.007706 | 0.000593 |
| -0.112801 | -0.156114 | -0.176558 | 1.0 | 0.015487 | 0.103172 | -0.133768 | -0.033194 | 0.015687 | 0.136663 | -0.192961 | -0.111156 | -0.010133 | -0.056468 | -0.008356 | 0.001929 |
| 0.036033 | 0.009213 | 0.009557 | 0.015487 | 1.0 | 0.043542 | 0.033176 | -0.196576 | -0.119117 | -0.135113 | -0.115064 | 0.040664 | 0.02811 | -0.024323 | -0.200502 | -0.025502 |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| 0.040069 | 0.000971 | -0.000584 | -0.111156 | 0.040664 | 0.034684 | -0.104765 | -0.230753 | -0.527654 | -0.435017 | -0.029584 | 1.0 | 0.017645 | -0.013343 | -0.156258 | -0.083366 |
| 0.013488 | 0.013345 | 0.008384 | -0.010133 | 0.02811 | 0.036304 | 0.013961 | -0.006804 | -0.080463 | -0.220426 | -0.05598 | 0.017645 | 1.0 | 0.006919 | -0.091602 | -0.006889 |
| 0.030228 | 0.004058 | 0.012274 | -0.056468 | -0.024323 | -0.002485 | 0.00932 | -0.112974 | 0.141553 | 0.056927 | 0.058599 | -0.013343 | 0.006919 | 1.0 | -0.003821 | 0.05631 |
| -0.057007 | -0.003661 | -0.007706 | -0.008356 | -0.200502 | -0.08638 | 0.012608 | 0.417625 | 0.358156 | 0.283396 | 0.043455 | -0.156258 | -0.091602 | -0.003821 | 1.0 | 0.031509 |
| 0.018175 | -0.002467 | 0.000593 | 0.001929 | -0.025502 | 0.022574 | -0.067961 | -0.183312 | 0.10058 | 0.048295 | 0.075679 | -0.083366 | -0.006889 | 0.05631 | 0.031509 | 1.0 |
In [7]:
fig = px.imshow((df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).corr().with_columns(pl.
all().round(1))),
y = df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).columns,
text_auto = True, aspect = "auto", color_continuous_scale = "RdBu_r")
fig.show()
In [8]:
(df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).corr()
.with_columns(index = pl.lit(pl.Series(df_spotify.select(pl.col(pl.Int64), pl.col(pl.Float64)).
columns)))
.unpivot(index = 'index')
.filter(pl.col('index') != pl.col('variable'))
.filter((pl.col('value') > 0.5) | (pl.col('value') < -0.5))
.sort('value', descending = True))
Out[8]:
shape: (4, 3)
| index | variable | value |
|---|---|---|
| str | str | f64 |
| "loudness" | "energy" | 0.724866 |
| "energy" | "loudness" | 0.724866 |
| "acousticness" | "energy" | -0.527654 |
| "energy" | "acousticness" | -0.527654 |
In [9]:
fig = px.scatter(df_spotify.sample(fraction = 0.1, seed = 123),
x = 'loudness', y = 'energy', trendline = 'ols', trendline_color_override = "red", opacity = 0.1,
range_y = [0, 1])
fig.show()
In [10]:
result = px.get_trendline_results(fig)
result.px_fit_results.iloc[0].summary()
Out[10]:
| Dep. Variable: | y | R-squared: | 0.528 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.528 |
| Method: | Least Squares | F-statistic: | 1.434e+05 |
| Date: | Sat, 06 Dec 2025 | Prob (F-statistic): | 0.00 |
| Time: | 19:03:05 | Log-Likelihood: | 1.0000e+05 |
| No. Observations: | 128158 | AIC: | -2.000e+05 |
| Df Residuals: | 128156 | BIC: | -2.000e+05 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.9488 | 0.001 | 1138.448 | 0.000 | 0.947 | 0.950 |
| x1 | 0.0460 | 0.000 | 378.679 | 0.000 | 0.046 | 0.046 |
| Omnibus: | 408.453 | Durbin-Watson: | 0.490 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 508.583 |
| Skew: | -0.058 | Prob(JB): | 3.65e-111 |
| Kurtosis: | 3.286 | Cond. No. | 18.8 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [11]:
fig = px.scatter(df_spotify.sample(fraction = 0.1, seed = 123),
x = 'acousticness', y = 'energy', trendline_color_override = "red", trendline = "ols",
opacity = 0.1)
fig.show()
In [12]:
result = px.get_trendline_results(fig)
result.px_fit_results.iloc[0].summary()
Out[12]:
| Dep. Variable: | y | R-squared: | 0.274 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.274 |
| Method: | Least Squares | F-statistic: | 4.839e+04 |
| Date: | Sat, 06 Dec 2025 | Prob (F-statistic): | 0.00 |
| Time: | 19:03:12 | Log-Likelihood: | 72408. |
| No. Observations: | 128158 | AIC: | -1.448e+05 |
| Df Residuals: | 128156 | BIC: | -1.448e+05 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.7473 | 0.001 | 1319.663 | 0.000 | 0.746 | 0.748 |
| x1 | -0.3441 | 0.002 | -219.967 | 0.000 | -0.347 | -0.341 |
| Omnibus: | 1380.436 | Durbin-Watson: | 0.984 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 1356.962 |
| Skew: | -0.231 | Prob(JB): | 2.18e-295 |
| Kurtosis: | 2.798 | Cond. No. | 4.38 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
10.3 스포티파이 데이터로 글로벌 인기도 파악하기¶
In [13]:
expr_1 = pl.col('name').unique().len().over('main_vocal')
df_spotify_EDA1 = (
pl.concat([
## 글로벌 메인보컬 Top 10 산출
(df_spotify.filter(pl.col('country') == 'WW').select(pl.col('main_vocal').alias('Global_Main_Vocal'),
expr_1.alias('Global_Songs')).unique().sort('Global_Songs', descending = True).head(10)),
## 우리나라 메인보컬 Top 10 산출
(df_spotify.filter(pl.col('country') == 'KR').select(pl.col('main_vocal').alias('KR_Main_Vocal'),
expr_1.alias('KR_Songs')).unique().sort('KR_Songs', descending = True).head(10)),
## 미국 메인보컬 Top 10 산출
(df_spotify.filter(pl.col('country') == 'US').select(pl.col('main_vocal').alias('US_Main_Vocal'),
expr_1.alias('US_Songs')).unique().sort('US_Songs', descending = True).head(10)),
## 영국 메인보컬 Top 10 산출
(df_spotify.filter(pl.col('country') == 'GB').select(pl.col('main_vocal').alias('GB_Main_Vocal'),
expr_1.alias('GB_Songs')).unique().sort('GB_Songs', descending = True).head(10))],
how = 'horizontal')
.with_columns(pl.int_range(1, 11).alias('rank'))
.select(pl.col('rank'), pl.all().exclude('rank')))
df_spotify_EDA1
Out[13]:
shape: (10, 9)
| rank | Global_Main_Vocal | Global_Songs | KR_Main_Vocal | KR_Songs | US_Main_Vocal | US_Songs | GB_Main_Vocal | GB_Songs |
|---|---|---|---|---|---|---|---|---|
| i64 | str | u32 | str | u32 | str | u32 | str | u32 |
| 1 | "Taylor Swift" | 36 | "Jimin" | 21 | "Taylor Swift" | 37 | "Taylor Swift" | 36 |
| 2 | "Kendrick Lamar" | 17 | "Lim Young Woong" | 19 | "Future" | 35 | "Oasis" | 29 |
| 3 | "Beyoncé" | 17 | "aespa" | 16 | "Beyoncé" | 23 | "Kanye West" | 20 |
| 4 | "Future" | 16 | "DAY6" | 16 | "Kendrick Lamar" | 20 | "Beyoncé" | 19 |
| 5 | "Sabrina Carpenter" | 15 | "NewJeans" | 15 | "Kanye West" | 18 | "Eminem" | 19 |
| 6 | "Kanye West" | 15 | "Jung Kook" | 13 | "Zach Bryan" | 18 | "Kendrick Lamar" | 16 |
| 7 | "Tyler" | 15 | "V" | 12 | "Ariana Grande" | 17 | "Ariana Grande" | 16 |
| 8 | "Eminem" | 14 | "LE SSERAFIM" | 11 | "Post Malone" | 17 | "Tyler" | 15 |
| 9 | "Ariana Grande" | 14 | "YANGHONGWON" | 10 | "Eminem" | 17 | "Sabrina Carpenter" | 15 |
| 10 | "Billie Eilish" | 11 | "Taylor Swift" | 10 | "Tyler" | 16 | "Charli xcx" | 14 |
In [14]:
fig = px.bar(
(df_spotify.filter(pl.col('country').is_in(["WW", "KR", "US", "GB"])) ## 대상 국가 필터링
.group_by('country', 'main_vocal') ## 국가와 메인보컬로 그룹화
.agg(pl.col('name').unique().len()) ## 노래 이름의 고윳값에 대한 개수 산출
.sort(['country', 'name'], descending = True) ## 결과를 국가와 노래 이름으로 정렬
.group_by('country', maintain_order = True) ## 결과를 국가명으로 다시 그룹화
.head(10)), ## 상위 10개만 선택
## X축은 main_vocal, Y축과 막대 텍스트는 name으로 설정
x = 'main_vocal', y = 'name', text = 'name',
facet_row = 'country', facet_row_spacing = 0.07, ## facet 설정
labels = {"main_vocal": "메인보컬", "name": "노래수"}) ## 축 라벨 설정
fig.update_xaxes(matches = None, showticklabels = True) ## X축 간의 매칭과 틱라벨을 제거
fig.show()
In [15]:
(df_spotify.filter(pl.col('country') == "WW", ## 글로벌만 필터링
pl.col('main_vocal') == "Taylor Swift") ## 테일러 스위프트만 필터링
.group_by(['main_vocal', 'name']) ## 메인보컬과 노래명으로 그룹화
.len('chart in days') ## 전체 개수 산출
.sort('chart in days', descending = True).head(10))
Out[15]:
shape: (10, 3)
| main_vocal | name | chart in days |
|---|---|---|
| str | str | u32 |
| "Taylor Swift" | "Cruel Summer" | 290 |
| "Taylor Swift" | "Fortnight (feat. Post Malone)" | 119 |
| "Taylor Swift" | "I Can Do It With a Broken Hear… | 76 |
| "Taylor Swift" | "Down Bad" | 32 |
| "Taylor Swift" | "Who’s Afraid of Little Old Me?" | 27 |
| "Taylor Swift" | "Guilty as Sin?" | 27 |
| "Taylor Swift" | "So Long, London" | 24 |
| "Taylor Swift" | "But Daddy I Love Him" | 24 |
| "Taylor Swift" | "My Boy Only Breaks His Favorit… | 24 |
| "Taylor Swift" | "Florida!!! (feat. Florence + T… | 16 |
In [16]:
(df_spotify.filter(pl.col('country') == "KR", pl.col('main_vocal') == "Jimin")
.group_by(['main_vocal', 'name']).len('chart in days').sort('chart in days', descending =
True).head(10))
Out[16]:
shape: (10, 3)
| main_vocal | name | chart in days |
|---|---|---|
| str | str | u32 |
| "Jimin" | "Closer Than This" | 353 |
| "Jimin" | "Like Crazy" | 352 |
| "Jimin" | "Set Me Free Pt.2" | 201 |
| "Jimin" | "Like Crazy (English Version)" | 201 |
| "Jimin" | "Alone" | 180 |
| "Jimin" | "Face-off" | 179 |
| "Jimin" | "Smeraldo Garden Marching Band … | 176 |
| "Jimin" | "Who" | 155 |
| "Jimin" | "Slow Dance (feat. Sofia Carson… | 155 |
| "Jimin" | "Be Mine" | 155 |
In [17]:
df_spotify_EDA2 = ( pl.concat([
(df_spotify.filter(pl.col('country') == "WW") ## 글로벌 차트만 필터링
.select(pl.col('name').alias('Global_Song'), ## 노래명 열 선택
pl.col('main_vocal').alias('Global_Vocal'), ## 메인보컬 열 선택
pl.col('name').len().over('name').alias('Global_Day')) ## 노래명별 노래 수 산출
## 고유 행만 산출 정렬 후 상위 10곡만 출력
.unique().sort('Global_Day', descending = True).head(10)),
(df_spotify.filter(pl.col('country') == "KR") ## 한국 차트만 필터링
.select(pl.col('name').alias('KR_Song'),
pl.col('main_vocal').alias('KR_Vocal'),
pl.col('name').len().over('name').alias('KR_Day'))
.unique().sort('KR_Day', descending = True).head(10)),
(df_spotify.filter(pl.col('country') == "US") ## 미국 차트만 필터링
.select(pl.col('name').alias('US_Song'),
pl.col('main_vocal').alias('US_Vocal'),
pl.col('name').len().over('name').alias('US_Day'))
.unique().sort('US_Day', descending = True).head(10)),
(df_spotify.filter(pl.col('country') == "GB")
.select(pl.col('name').alias('GB_Song'), ## 영국 차트만 핕터링
pl.col('main_vocal').alias('GB_Vocal'),
pl.col('name').len().over('name').alias('GB_Day'))
.unique().sort('GB_Day', descending = True).head(10))],
how = 'horizontal')
.with_columns(pl.int_range(1, 11).alias('rank')) ## 순위 열 생성
.select(pl.col('rank'), pl.all().exclude('rank')) ## 순위 열을 앞으로 재배치
)
df_spotify_EDA2
Out[17]:
shape: (10, 13)
| rank | Global_Song | Global_Vocal | Global_Day | KR_Song | KR_Vocal | KR_Day | US_Song | US_Vocal | US_Day | GB_Song | GB_Vocal | GB_Day |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| i64 | str | str | u32 | str | str | u32 | str | str | u32 | str | str | u32 |
| 1 | "One Of The Girls (with JENNIE,… | "The Weeknd" | 341 | "Closer Than This" | "Jimin" | 353 | "Stick Season" | "Noah Kahan" | 326 | "Stick Season" | "Noah Kahan" | 327 |
| 2 | "I Wanna Be Yours" | "Arctic Monkeys" | 341 | "3D (feat. Jack Harlow)" | "Jung Kook" | 353 | "Lose Control" | "Teddy Swims" | 325 | "Lose Control" | "Teddy Swims" | 320 |
| 3 | "Lose Control" | "Teddy Swims" | 340 | "Seven (feat. Latto) (Explicit … | "Jung Kook" | 353 | "I Remember Everything (feat. K… | "Zach Bryan" | 322 | "Beautiful Things" | "Benson Boone" | 309 |
| 4 | "Beautiful Things" | "Benson Boone" | 328 | "Standing Next to You" | "Jung Kook" | 352 | "Something in the Orange" | "Zach Bryan" | 317 | "Mr. Brightside" | "The Killers" | 300 |
| 5 | "The Night We Met" | "Lord Huron" | 324 | "Like Crazy" | "Jimin" | 352 | "Beautiful Things" | "Benson Boone" | 313 | "Cruel Summer" | "Taylor Swift" | 272 |
| 6 | "Cruel Summer" | "Taylor Swift" | 290 | "Love Me Again" | "V" | 336 | "Last Night" | "Morgan Wallen" | 272 | "Scared To Start" | "Michael Marcagi" | 266 |
| 7 | "LUNA" | "Feid" | 276 | "Grain of Sand" | "Lim Young Woong" | 327 | "See You Again (feat. Kali Uchi… | "Tyler" | 263 | "Too Sweet" | "Hozier" | 244 |
| 8 | "End of Beginning" | "Djo" | 266 | "Do or Die" | "Lim Young Woong" | 322 | "Good Luck, Babe!" | "Chappell Roan" | 247 | "Unwritten" | "Natasha Bedingfield" | 243 |
| 9 | "we can't be friends (wait for … | "Ariana Grande" | 255 | "London Boy" | "Lim Young Woong" | 299 | "Espresso" | "Sabrina Carpenter" | 241 | "The Night We Met" | "Lord Huron" | 232 |
| 10 | "Too Sweet" | "Hozier" | 251 | "Polaroid" | "Lim Young Woong" | 282 | "A Bar Song (Tipsy)" | "Shaboozey" | 239 | "Good Luck, Babe!" | "Chappell Roan" | 229 |
In [18]:
(df_spotify_EDA2.style
.tab_header(title = "2024년 노래 Top 10") ## 표 제목 설정
.tab_stub(rowname_col = 'rank') ## 스텁 설정
## 스패너 설정
.tab_spanner("글로벌", ['Global_Song', 'Global_Vocal', 'Global_Day'])
.tab_spanner("한국", ['KR_Song', 'KR_Vocal', 'KR_Day'])
.tab_spanner("미국", ['US_Song', 'US_Vocal', 'US_Day'])
.tab_spanner("영국", ['GB_Song', 'GB_Vocal', 'GB_Day'])
## 열 정렬 설정
.cols_align(align = "center")
## 열 라벨 설정
.cols_label(Global_Song = "노래", Global_Vocal = "메인보컬", Global_Day = "차트일수",
KR_Song = "노래", KR_Vocal = "메인보컬", KR_Day = "차트일수",
US_Song = "노래", US_Vocal = "메인보컬", US_Day = "차트일수",
GB_Song = "노래", GB_Vocal = "메인보컬", GB_Day = "차트일수"))
Out[18]:
| 2024년 노래 Top 10 | ||||||||||||
| 글로벌 | 한국 | 미국 | 영국 | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 노래 | 메인보컬 | 차트일수 | 노래 | 메인보컬 | 차트일수 | 노래 | 메인보컬 | 차트일수 | 노래 | 메인보컬 | 차트일수 | |
| 1 | One Of The Girls (with JENNIE, Lily Rose Depp) | The Weeknd | 341 | Closer Than This | Jimin | 353 | Stick Season | Noah Kahan | 326 | Stick Season | Noah Kahan | 327 |
| 2 | I Wanna Be Yours | Arctic Monkeys | 341 | 3D (feat. Jack Harlow) | Jung Kook | 353 | Lose Control | Teddy Swims | 325 | Lose Control | Teddy Swims | 320 |
| 3 | Lose Control | Teddy Swims | 340 | Seven (feat. Latto) (Explicit Ver.) | Jung Kook | 353 | I Remember Everything (feat. Kacey Musgraves) | Zach Bryan | 322 | Beautiful Things | Benson Boone | 309 |
| 4 | Beautiful Things | Benson Boone | 328 | Standing Next to You | Jung Kook | 352 | Something in the Orange | Zach Bryan | 317 | Mr. Brightside | The Killers | 300 |
| 5 | The Night We Met | Lord Huron | 324 | Like Crazy | Jimin | 352 | Beautiful Things | Benson Boone | 313 | Cruel Summer | Taylor Swift | 272 |
| 6 | Cruel Summer | Taylor Swift | 290 | Love Me Again | V | 336 | Last Night | Morgan Wallen | 272 | Scared To Start | Michael Marcagi | 266 |
| 7 | LUNA | Feid | 276 | Grain of Sand | Lim Young Woong | 327 | See You Again (feat. Kali Uchis) | Tyler | 263 | Too Sweet | Hozier | 244 |
| 8 | End of Beginning | Djo | 266 | Do or Die | Lim Young Woong | 322 | Good Luck, Babe! | Chappell Roan | 247 | Unwritten | Natasha Bedingfield | 243 |
| 9 | we can't be friends (wait for your love) | Ariana Grande | 255 | London Boy | Lim Young Woong | 299 | Espresso | Sabrina Carpenter | 241 | The Night We Met | Lord Huron | 232 |
| 10 | Too Sweet | Hozier | 251 | Polaroid | Lim Young Woong | 282 | A Bar Song (Tipsy) | Shaboozey | 239 | Good Luck, Babe! | Chappell Roan | 229 |
In [19]:
## 국가별, 메인보컬별, 노래별 차트일 수 붙이기
(pl.concat([
(df_spotify.filter(pl.col('country') == "WW", pl.col('daily_rank') == 1)
.group_by('name')
.agg(pl.col('main_vocal').first().alias('Global_Main_Vocal'),
pl.len().alias('Global_Chart_Days'))
.rename({"name": "Global_Song"}).sort('Global_Chart_Days', descending = True).head(10)),
(df_spotify.filter(pl.col('country') == "KR", pl.col('daily_rank') == 1)
.group_by('name')
.agg(pl.col('main_vocal').first().alias('KR_Main_Vocal'), pl.len().alias('KR_Chart_Days'))
.rename({"name": "KR_Song"}).sort('KR_Chart_Days', descending = True).head(10)),
(df_spotify.filter(pl.col('country') == "US", pl.col('daily_rank') == 1)
.group_by('name')
.agg(pl.col('main_vocal').first().alias('US_Main_Vocal'), pl.len().alias('US_Chart_Days'))
.rename({"name": "US_Song"}).sort('US_Chart_Days', descending = True).head(10)),
(df_spotify.filter(pl.col('country') == "GB", pl.col('daily_rank') == 1)
.group_by('name')
.agg(pl.col('main_vocal').first().alias('GB_Main_Vocal'), pl.len().alias('GB_Chart_Days'))
.rename({"name": "GB_Song"}).sort('GB_Chart_Days', descending = True).head(10))],
how = 'horizontal')
.with_columns(pl.int_range(1, 11).alias('rank')) ## 순위 열 만들기
.select(pl.col('rank'), pl.all().exclude('rank')).style ## 순위 열 순서 설정
.tab_header(title = "2024년 차트 1위 노래 Top 10") ## 표 제목 설정
.tab_stub(rowname_col = 'rank')
## 스패너 설정
.tab_spanner("글로벌", ['Global_Song', 'Global_Main_Vocal', 'Global_Chart_Days'])
.tab_spanner("한국", ['KR_Song', 'KR_Main_Vocal', 'KR_Chart_Days'])
.tab_spanner("미국", ['US_Song', 'US_Main_Vocal', 'US_Chart_Days'])
.tab_spanner("영국", ['GB_Song', 'GB_Main_Vocal', 'GB_Chart_Days'])
.cols_align(align = "center") ## 열 제목 정렬
## 열 라벨 설정
.cols_label(Global_Song = "노래", Global_Main_Vocal = "메인보컬", Global_Chart_Days = "차트일수",
KR_Song = "노래", KR_Main_Vocal = "메인보컬", KR_Chart_Days = "차트일수",
US_Song = "노래", US_Main_Vocal = "메인보컬", US_Chart_Days = "차트일수",
GB_Song = "노래", GB_Main_Vocal = "메인보컬", GB_Chart_Days = "차트일수"))
Out[19]:
| 2024년 차트 1위 노래 Top 10 | ||||||||||||
| 글로벌 | 한국 | 미국 | 영국 | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 노래 | 메인보컬 | 차트일수 | 노래 | 메인보컬 | 차트일수 | 노래 | 메인보컬 | 차트일수 | 노래 | 메인보컬 | 차트일수 | |
| 1 | Die With A Smile | Lady Gaga | 95 | Like Crazy | Jimin | 170 | Not Like Us | Kendrick Lamar | 48 | Stick Season | Noah Kahan | 73 |
| 2 | Beautiful Things | Benson Boone | 36 | Who | Jimin | 155 | Taste | Sabrina Carpenter | 35 | Espresso | Sabrina Carpenter | 65 |
| 3 | Espresso | Sabrina Carpenter | 33 | Magnetic | ILLIT | 11 | CARNIVAL | Kanye West | 34 | Taste | Sabrina Carpenter | 59 |
| 4 | BIRDS OF A FEATHER | Billie Eilish | 24 | How Sweet | NewJeans | 9 | Please Please Please | Sabrina Carpenter | 27 | Please Please Please | Sabrina Carpenter | 23 |
| 5 | La Diabla | Xavi | 21 | Supernova | aespa | 7 | Lovin On Me | Jack Harlow | 23 | Last Christmas | Wham! | 20 |
| 6 | Who | Jimin | 20 | Supernatural | NewJeans | 1 | That’s So True | Gracie Abrams | 18 | Too Sweet | Hozier | 17 |
| 7 | APT. | ROSÉ | 19 | None | None | None | Too Sweet | Hozier | 17 | That’s So True | Gracie Abrams | 13 |
| 8 | Please Please Please | Sabrina Carpenter | 19 | None | None | None | Rockin' Around The Christmas Tree | Brenda Lee | 16 | BACKBONE | Chase & Status | 9 |
| 9 | i like the way you kiss me | Artemas | 13 | None | None | None | Die With A Smile | Lady Gaga | 16 | Sailor Song | Gigi Perez | 8 |
| 10 | MILLION DOLLAR BABY | Tommy Richman | 11 | None | None | None | Good Luck, Babe! | Chappell Roan | 13 | Beautiful Things | Benson Boone | 8 |
In [20]:
(df_spotify.filter(pl.col('country').is_in(["WW", "KR", "US", "GB"]), ## 국가 필터링
pl.col('daily_rank') == 1) ## 1위만 필터링
## 국가별 1위곡 수 산출
.select(pl.col('country'), pl.col('name').unique().len().over('country'))
.unique().sort('name', descending = True))
Out[20]:
shape: (4, 2)
| country | name |
|---|---|
| str | u32 |
| "US" | 35 |
| "GB" | 22 |
| "WW" | 21 |
| "KR" | 6 |
In [21]:
fig = px.line((df_spotify.
## 우리나라 1위곡들만 필터링
filter(pl.col('name').is_in(["Like Crazy", "Who", "Magnetic", "How Sweet", "Supernova",
"Supernatural"]),
pl.col('country') == 'KR')),
x = 'snapshot_date', y = 'daily_rank', color = 'name', line_dash = 'name',
labels = {"snapshot_date": "날짜", "daily_rank": "순위", "name": "노래"})
fig.update_yaxes(autorange = "reversed")
fig.show()
In [22]:
(df_spotify.filter(pl.col('name') == "APT.", pl.col('daily_rank') == 1)
.select(pl.col('continent'), pl.col('country').unique().len().over('continent').alias('NO.1'))
.unique().sort('NO.1', descending = True))
Out[22]:
shape: (5, 2)
| continent | NO.1 |
|---|---|
| str | u32 |
| "Asia" | 9 |
| "Oceania" | 2 |
| "North America" | 2 |
| "Europe" | 2 |
| "Global" | 1 |
In [23]:
(df_spotify.filter(pl.col('name') == "APT.", pl.col('daily_rank') == 1,
pl.col('continent') == "Global").select(pl.col('snapshot_date')))
Out[23]:
shape: (19, 1)
| snapshot_date |
|---|
| date |
| 2024-10-22 |
| 2024-10-23 |
| 2024-10-24 |
| 2024-10-25 |
| 2024-10-26 |
| … |
| 2024-11-05 |
| 2024-11-25 |
| 2024-11-26 |
| 2024-11-27 |
| 2024-12-09 |
In [24]:
(df_spotify.filter(pl.col('country') == "KR", pl.col('name') == "APT.")
.select(pl.col('daily_rank').min()))
Out[24]:
shape: (1, 1)
| daily_rank |
|---|
| i64 |
| 2 |
In [25]:
fig = px.line((df_spotify.filter(pl.col('country').is_in(["WW", "KR", "US", "GB"]),
pl.col('name') == "APT.")),
x = 'snapshot_date', y = 'daily_rank', color = 'country', line_dash = 'country')
fig.update_yaxes(autorange = "reversed")
fig.show()
In [26]:
df_spotify_EDA4 = (
df_spotify.filter(pl.col('name') == "APT.").select(pl.col('country'), pl.col('continent'),
pl.col('country').map_elements(
lambda x: pc.country_name_to_country_alpha3(pc.country_alpha2_to_country_name(x))
if x != "WW" else "WW", return_dtype = pl.String).alias('nation'),
(pl.col('country').map_elements(
lambda x: pc.country_alpha2_to_country_name(x) if x != "WW" else "WW",
return_dtype = pl.String).alias('nation_name')),
pl.col('popularity'), pl.col('daily_rank'), pl.col('name').len().over('country').
alias('chart_days'))
.group_by('nation').agg(pl.col('country').first(),
pl.col('nation_name').first(), pl.col('continent').first(),
pl.col('popularity').mean(), pl.col('daily_rank').mean(),
pl.col('chart_days').first()))
df_spotify_EDA4.sort('daily_rank')
Out[26]:
shape: (68, 7)
| nation | country | nation_name | continent | popularity | daily_rank | chart_days |
|---|---|---|---|---|---|---|
| str | str | str | str | f64 | f64 | u32 |
| "SGP" | "SG" | "Singapore" | "Asia" | 90.301587 | 1.238095 | 63 |
| "HKG" | "HK" | "Hong Kong" | "Asia" | 90.301587 | 1.301587 | 63 |
| "MYS" | "MY" | "Malaysia" | "Asia" | 90.301587 | 1.31746 | 63 |
| "TWN" | "TW" | "Taiwan, Province of China" | "Asia" | 90.301587 | 2.31746 | 63 |
| "ARE" | "AE" | "United Arab Emirates" | "Asia" | 90.301587 | 2.412698 | 63 |
| … | … | … | … | … | … | … |
| "PRY" | "PY" | "Paraguay" | "South America" | 86.742857 | 40.742857 | 35 |
| "COL" | "CO" | "Colombia" | "South America" | 93.485714 | 41.4 | 35 |
| "DOM" | "DO" | "Dominican Republic" | "North America" | 89.7 | 46.0 | 10 |
| "GTM" | "GT" | "Guatemala" | "North America" | 94.25 | 47.75 | 4 |
| "GRC" | "GR" | "Greece" | "Europe" | 91.0 | 49.0 | 2 |
In [27]:
df_spotify.select(pl.col('country').unique()).join(df_spotify_EDA4, on = 'country', how = "anti")
Out[27]:
shape: (5, 1)
| country |
|---|
| str |
| "AR" |
| "EG" |
| "BY" |
| "NG" |
| "UY" |
In [28]:
fig = px.choropleth(df_spotify_EDA4, locations = 'nation', color = 'popularity', scope = "world",
hover_name = 'nation_name', color_continuous_scale = "greens", width = 800, height = 600,
title = "로제의 APT. 인기도")
fig.show()
In [29]:
fig = px.choropleth(df_spotify_EDA4, locations = 'nation', color = 'daily_rank', scope = "world",
hover_name = 'nation_name', color_continuous_scale = "greens_r", width = 800, height = 600,
title = "로제의 APT. 평균 순위")
fig.show()